#!/usr/bin/env python3
import argparse, re, shutil, subprocess, sys, tempfile
from typing import List, Optional, Tuple

def parse_args() -> argparse.Namespace:
    """ Parse command line arguments
    """
    parser = argparse.ArgumentParser(description='Split a PDF into two at a specific PDF bookmark, preserving other PDF bookmarks.')
    parser = argparse.ArgumentParser(description='REQUIRES: pdftk installed and available on the command line path.')
    parser.add_argument('-i', '--input', required=True, type=str, help='The input PDF file to split in two')
    parser.add_argument('-o1', '--output1', required=True, type=str, help='The first output PDF file')
    parser.add_argument('-o2', '--output2', required=True, type=str, help='The second output PDF file')
    parser.add_argument('--split-at-bookmark', required=True, type=str, help='The name of the bookmark at which to split the PDF')
    parser.add_argument('--delete-bookmark', action='store_true', help='Omit the specitfied bookmark from the output PDFs')
    return parser.parse_args()

def pdf_detect_bookmark(input_file: str, bookmark_name: str) -> Tuple[str, int]:
    pdf_metadata = subprocess.run(['pdftk', input_file, 'dump_data'], capture_output=True, text=True)
    if pdf_metadata.returncode == 0:
        # Search for the bookmark specified by the user and the next two lines
        match = re.search(rf'BookmarkTitle: {bookmark_name}\n.*\n.*', pdf_metadata.stdout)
        if match:
            # Split the matched string into lines
            lines = match.group(0).split('\n')

            # Loop through the lines and find the page number
            for i in range(len(lines)):
                if 'BookmarkPageNumber' in lines[i]:
                    split_at_page = int(lines[i].split(': ')[1])
                    print(f'Detected PDF bookmark "{bookmark_name}" on page {split_at_page}')
                    return pdf_metadata.stdout, split_at_page
        else:
            raise ValueError(f'Could not find PDF bookmark "{bookmark_name}" in {input_file}')
    else:
        print(pdf_metadata.stdout)
        raise RuntimeError(f'pdftk returned non-zero exit code {pdf_metadata.returncode}')
    
def pdf_metadata_split_bookmarks(input_metadata: str, split_at_page: int, delete_bookmark: Optional[str] = None) -> Tuple[str, str]:
    metadata_list = input_metadata.split('\n')
    bookmarks1 = []
    bookmarks2 = []

    bookmark_block = []
    in_bookmark_block = False

    for line in metadata_list:
        if in_bookmark_block:
            if re.match('^BookmarkPageNumber: [0-9]+', line):

                # Obtain page number
                page_number = int(line.split(': ')[1])

                # Optionally delete the specified bookmark
                if page_number==split_at_page and delete_bookmark:
                    if bookmark_block[1] == f'BookmarkTitle: {delete_bookmark}':
                        bookmark_block = []
                        in_bookmark_block = False
                        continue

                # Split the bookmarks in two
                if page_number < split_at_page:
                    bookmark_block.append(line)
                    bookmarks1.extend(bookmark_block)
                else:
                    bookmark_block.append(f'BookmarkPageNumber: {page_number - split_at_page + 1}')
                    bookmarks2.extend(bookmark_block)

                # Reset the bookmark block
                bookmark_block = []
                in_bookmark_block = False
            else:
                bookmark_block.append(line)
        else:
            if re.match('^BookmarkBegin$', line):
                bookmark_block.append(line)
                in_bookmark_block = True

    # Ensure minimum bookmark level is 1
    bookmarks1 = pdf_metadata_bookmarklevel_normalize(bookmarks1)
    bookmarks2 = pdf_metadata_bookmarklevel_normalize(bookmarks2)

    return '\n'.join(bookmarks1), '\n'.join(bookmarks2)

def pdf_metadata_bookmarklevel_normalize(bookmarks: List[str]) -> List[str]:
    # Calculate minimum bookmark level
    min_bookmark_level = None
    for line in bookmarks:
        if re.match('^BookmarkLevel: [0-9]+', line):
            bookmark_level = int(line.split(': ')[1])
            if min_bookmark_level is None:
                min_bookmark_level = bookmark_level
            elif bookmark_level < min_bookmark_level:
                min_bookmark_level = bookmark_level

    # Ensure minimum bookmark level is 1
    if min_bookmark_level:
        for i in range(len(bookmarks)):
            if re.match('^BookmarkLevel: [0-9]+', bookmarks[i]):
                bookmark_level = int(bookmarks[i].split(': ')[1])
                bookmarks[i] = f'BookmarkLevel: {bookmark_level - min_bookmark_level + 1}'

    return bookmarks

def pdf_split_at_page(input_file: str, output_file1: str, output_file2: str, split_at_page: int) -> None:
    output_pdf_1 = subprocess.run(['pdftk', input_file, 'cat', f'1-{split_at_page - 1}', 'output', output_file1], capture_output=True, text=True)
    if output_pdf_1.returncode == 0:
        print(f'Created {output_file1} containing pages 1-{split_at_page - 1} of {input_file}')
    else:
        print(output_pdf_1.stdout)
        raise RuntimeError(f'pdftk returned non-zero exit code {output_pdf_1.returncode}')
    
    output_pdf_2 = subprocess.run(['pdftk', input_file, 'cat', f'{split_at_page}-end', 'output', output_file2], capture_output=True, text=True)
    if output_pdf_2.returncode == 0:
        print(f'Created {output_file2} containing pages {split_at_page}-end of {input_file}')
    else:
        print(output_pdf_2.stdout)
        raise RuntimeError(f'pdftk returned non-zero exit code {output_pdf_2.returncode}')

def pdf_insert_bookmarks(input_file: str, bookmarks: str) -> None:

    # Obtain metadata for the PDF to be modified
    input_metadata = subprocess.run(['pdftk', input_file, 'dump_data'], capture_output=True, text=True)
    if input_metadata.returncode == 0:

        # Verify that there are no bookmarks in the input PDF
        if 'BookmarkBegin' in input_metadata.stdout:
            raise ValueError(f'{input_file} already contains bookmarks')

        # Split the metadata into before/after bookmarks
        input_metadata_parts = [i.rstrip('\n') for i in input_metadata.stdout.split('PageMediaBegin', 1)]
        input_metadata_parts[1] = 'PageMediaBegin' + input_metadata_parts[1]

        # Insert the bookmarks into the metadata
        with tempfile.NamedTemporaryFile(mode='w', delete=True) as temp_metadata, tempfile.NamedTemporaryFile(mode='w', delete=True) as temp_pdf:
            temp_metadata.write(input_metadata_parts[0])
            temp_metadata.write('\n')
            temp_metadata.write(bookmarks)
            temp_metadata.write('\n')
            temp_metadata.write(input_metadata_parts[1])

            temp_metadata.seek(0)
            updated_pdf = subprocess.run(['pdftk', input_file, 'update_info', temp_metadata.name, 'output', temp_pdf.name], capture_output=True, text=True)

            if updated_pdf.returncode == 0:
                shutil.copy2(temp_pdf.name, input_file)
                print(f'Inserted PDF bookmarks into {input_file}')
            else:
                print(updated_pdf.stdout)
                raise RuntimeError(f'pdftk returned non-zero exit code {updated_pdf.returncode}')
        
    else:
        print(input_metadata.stdout)
        raise RuntimeError(f'pdftk returned non-zero exit code {input_metadata.returncode}')

def main() -> int:

    # Parse command line arguments
    args = parse_args()

    # Obtain metadata for the input PDF
    input_pdf_metadata, split_at_page = pdf_detect_bookmark(input_file=args.input, bookmark_name=args.split_at_bookmark)

    # Separate the bookmarks for the first and second output PDFs
    if args.delete_bookmark:
        bookmarks1, bookmarks2 = pdf_metadata_split_bookmarks(input_metadata=input_pdf_metadata, split_at_page=split_at_page, delete_bookmark=args.split_at_bookmark)
    else:
        bookmarks1, bookmarks2 = pdf_metadata_split_bookmarks(input_metadata=input_pdf_metadata, split_at_page=split_at_page)

    # Split the PDF into two files
    pdf_split_at_page(input_file=args.input, output_file1=args.output1, output_file2=args.output2, split_at_page=split_at_page)

    # Insert the bookmarks into the output PDFs
    pdf_insert_bookmarks(input_file=args.output1, bookmarks=bookmarks1)
    pdf_insert_bookmarks(input_file=args.output2, bookmarks=bookmarks2)

    return 0

if __name__ == '__main__':
    sys.exit(main())
